/* create_peer_outcomes ********************************************************

Create average peer characteristics at the school-grade level

*******************************************************************************/

* switches for running section of code
local grade12_peers 					1
local ma_nsc_clean						1
local peer_outcomes						1

if `grade12_peers' == 1 {
	use "$stata_data_sims/all_sasid_full_build.dta", clear
	merge m:1 sasid using "$stata_data_crosswalks/SASID_Studentno.dta", keepusing(studentno)

	*Create a variable that indicates the year a school year starts. If it's october
	*the school year starts at the same year of observation. If it's eoy, we need
	*to go back a year to determine year start.
	gen simyear0 = simyear if t=="oct"
	replace simyear0 = simyear-1 if t=="eoy"

	*Create grade variables for prek and kindergarten
	replace grade = "-1" if grade=="PK"
	replace grade = "0" if inlist(grade,"KF","KP","KT")
	destring grade, replace force
	replace grade = .a if missing(grade)


	*Code peer outcomes

		* pre-k attendance
	gen prek = (enstat == 1 & grade == -1 & district == "0035")

		* HS graduation
	gen ingr12 = inlist(enstat, 1, 4) & grade == 12 // Note: if student is listed as graduating (enstat == 4), then count as attending in Grade 12
	gen hsgrad = (enstat == 4)
	gen hsgrad_cond12 = (enstat == 4) // will be edited later to be conditional on appearing in SIMS in grade 12

	keep sasid grade ingr12 hsgrad hsgrad_cond12 prek
	collapse (max) ingr12 hsgrad hsgrad_cond12 prek, by(sasid)

	save "$stata_data_sims/gr12_peers.dta", replace

}

if `ma_nsc_clean' == 1 {
	* Import full NSC database
	use "$raw_data/MA DESE data/NSC/save/NSC_all_appended02_17_2022.dta", clear

	gen any = recordfound==2

	*Generate ever attending result
	local result_interest any
	foreach result of varlist `result_interest' {

		*Check if they ever attended the relevant types
		bysort sasid: egen ever`result'_nsc = max(`result')

	}

	keep sasid everany_nsc
	duplicates drop

	drop if mi(sasid)
	isid sasid

	tostring sasid, replace
	save "$stata_data_nsc/mass_full_nsc.dta", replace

}

if `peer_outcomes' == 1 {
	* Import fully cleaned SIMS dataset
	use "$stata_data_sims/all_sasid_full_build.dta", clear

	*Create a variable that indicates the year a school year starts. If it's october
	*the school year starts at the same year of observation. If it's eoy, we need
	*to go back a year to determine year start.
	gen simyear0 = simyear if t=="oct"
	replace simyear0 = simyear-1 if t=="eoy"

	*Create grade variables for prek and kindergarten
	replace grade = "-1" if grade=="PK"
	replace grade = "0" if inlist(grade,"KF","KP","KT")
	destring grade, replace force
	drop if missing(grade)

	*Indicate if the observation is from beginning of year or end of year
	gen period = 0 if t=="oct"
	replace period= 1 if t=="eoy"

	*SIMS school codes in 2001-2002 are formatted differently (6 digits instead of 8)
	gen temp_name = full_school if inlist(simyear0, 2001, 2002)
	qui replace temp_name = substr(temp_name, -3, .)
	destring temp_name, replace
	destring org_code, replace
	replace temp_name = org_code*10000+temp_name
	tostring temp_name, replace
	replace full_school = temp_name if inlist(simyear0, 2001, 2002)
	drop temp_name org_code
	destring full_school, replace

	*Special Education Results
	*spd_prim_dis spd_natr_svcs spd_lvl_need spd_placement
	gen spd_lowest_need = spd_lvl_need == "01" if spd_lvl_need != ""
	gen spd_low_need = spd_lvl_need == "02" if spd_lvl_need != ""
	gen spd_mod_need = spd_lvl_need == "03" if spd_lvl_need != ""
	gen spd_high_need = spd_lvl_need == "04" if spd_lvl_need != ""
	gen spd_no_need = spd_lvl_need == "500" if spd_lvl_need != ""
	gen any_spd = spd_no_need != 1 if spd_lvl_need != ""

	* Since 2001 and 2002 SIMS files do not have SPED Level of Need variable, use SPED placement
	* variable instead
	replace spd_no_need = spd_placement == "00" if inlist(simyear0, 2001, 2002)
	replace any_spd = spd_no_need != 1 if spd_placement != "" & inlist(simyear0, 2001, 2002)

	*Track number absences and suspensions
	gen num_absences = member - attend

	*Free/Reduced lunch status
	destring low_inc, replace
	gen frpl = inlist(low_inc, 1, 2)

	*Female flag
	gen female = inlist(gender, "F", "f", "2")

	*Race flag
	destring(race), replace
	gen raceFIX = .
	replace raceFIX = 2 if race == 3 // Asian
	replace raceFIX = 3 if race == 2 // Black
	replace raceFIX = 5 if race == 1 // White
	replace raceFIX = 4 if race == 4 // Native American
	replace raceFIX = 1 if(race<=63 & race>=5) // Other race
	replace raceFIX = 99 if inlist(race, 33, 34, 35, 36, 37) // Hispanic

	replace race = raceFIX if simyear0 >= 2005
	gen asian = race == 2
	gen black = race == 3
	gen white = race == 5
	gen nativeam = race == 4
	gen otherrace = race == 1
	gen hispanic = race == 99

	*Keep only students with enrolled status
	keep if enstat == 1

	*Create total suspensions
	gen susp = in_susp + out_susp

	*Collapse on studentno simgrade
	destring lep, replace
	collapse (firstnm) full_school school asian black white ///
		nativeam otherrace hispanic female ///
		(min) simyear0  ///
		(max) lep any_spd frpl ///
		(sum) num_absences susp, by(sasid grade)

	gen black_hisp = black + hispanic
	assert black_hisp <= 1

	* Merge on eventual high school grad, pre-k attendance/college-going of peers
	merge m:1 sasid using "$stata_data_sims/gr12_peers.dta", keep(1 3) nogen
	merge m:1 sasid using "$stata_data_nsc/mass_full_nsc.dta", keep(1 3)
	replace everany_nsc = 0 if _merge == 1
	drop _merge

	replace hsgrad_cond12 = . if ingr12 == 0
	gen everany_nsc_cond12 = everany_nsc
	replace everany_nsc_cond12 = . if ingr12 == 0

	* Merge on sasid-grade test scores
	destring sasid, gen(sasid_no)
	merge 1:1 sasid_no grade using "$stata_data_mcas/mcas_sasid_long.dta", keep(1 3) nogen

	save "$stata_data_sims/school_level_peer_chars_before_collapse.dta", replace

	* Create school-level peer outcomes
	* These outcomes should be "leave-one-out" version
	local peerq_vars black_hisp black frpl susp prek hsgrad hsgrad_cond12 everany_nsc everany_nsc_cond12 std_e std_m
	foreach p of local peerq_vars {
		bysort full_school grade simyear0: egen N`p' = sum(!missing(`p'))
		bysort full_school grade simyear0: egen mean`p' = mean(`p')
		replace mean`p' = (N`p' * mean`p' - `p') / (N`p' - 1)
	}
	isid sasid_no grade

	tostring grade, replace
	replace grade = "K" if grade == "0"
	drop if grade == "-1"

	merge m:1 sasid using "$stata_data_crosswalks/SASID_Studentno.dta", keep(match)

	keep studentno mean* grade

	reshape wide mean*, i(studentno) j(grade) string

	save "$stata_data_sims/school_level_peer_chars.dta", replace
}
